The packages bellow are necessary to visualize phenology data. Comment lines added to allow the document to knit. If you need the packages installed, uncomment the install.packages lines.
#install.packages("readr")
#install.packages("ggplot2")
#install.packages("tidyr")
#install.packageS("dplyr")
#install.packageS("rjags")
#install.packages("rnoaa")
#install.packages("daymetr")
library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.1
## Loaded modules: basemod,bugs
library(rnoaa)
library(daymetr)
#devtools::install_github("EcoForecast/ecoforecastR",force=TRUE)
Read in site data and most up to date phenology data quantified from phenocam data by the ecoforecasting initiative. We are only interested in deciduous broadleaf forests. This can be filtered in the site_data according to the pheno_cam vegetation type. The remaining sites can be used to filter the phenology data.
site_data <- readr::read_csv("https://raw.githubusercontent.com/eco4cast/neon4cast-targets/main/NEON_Field_Site_Metadata_20220412.csv") |>
dplyr::filter(phenology == 1, phenocam_vegetation == "deciduous broadleaf")
## Rows: 81 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): field_domain_id, field_site_id, field_site_name, phenocam_code, ph...
## dbl (20): terrestrial, aquatics, phenology, ticks, beetles, field_latitude, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(site_data)
## # A tibble: 6 × 54
## field_…¹ field…² field…³ terre…⁴ aquat…⁵ pheno…⁶ ticks beetles pheno…⁷ pheno…⁸
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 D01 BART Bartle… 1 0 1 0 1 NEON.D… DB_1000
## 2 D02 BLAN Blandy… 1 0 1 1 1 NEON.D… DB_1000
## 3 D11 CLBJ Lyndon… 1 0 1 0 1 NEON.D… DB_2000
## 4 D08 DELA Dead L… 1 0 1 0 1 NEON.D… DB_1000
## 5 D07 GRSM Great … 1 0 1 0 1 NEON.D… DB_1000
## 6 D01 HARV Harvar… 1 0 1 0 1 NEON.D… DB_1000
## # … with 44 more variables: phenocam_vegetation <chr>, field_site_type <chr>,
## # field_site_subtype <chr>, field_colocated_site <chr>,
## # field_site_host <chr>, field_site_url <chr>,
## # field_nonneon_research_allowed <chr>, field_access_details <chr>,
## # field_neon_field_operations_office <chr>, field_latitude <dbl>,
## # field_longitude <dbl>, field_geodetic_datum <chr>,
## # field_utm_northing <dbl>, field_utm_easting <dbl>, field_utm_zone <chr>, …
NeonPheno =readr::read_csv("https://data.ecoforecast.org/neon4cast-targets/phenology/phenology-targets.csv.gz", guess_max = 1e6) |>
na.omit()
## Rows: 231804 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): site_id, variable
## dbl (1): observation
## date (1): datetime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(NeonPheno)
## # A tibble: 6 × 4
## datetime site_id variable observation
## <date> <chr> <chr> <dbl>
## 1 2017-05-30 ABBY gcc_90 0.417
## 2 2017-05-31 ABBY gcc_90 0.416
## 3 2017-06-01 ABBY gcc_90 0.418
## 4 2017-06-02 ABBY gcc_90 0.415
## 5 2017-06-03 ABBY gcc_90 0.422
## 6 2017-06-04 ABBY gcc_90 0.417
Pheno_decid = NeonPheno %>% #Filter sites accord to deciduous broadleaf
filter(site_id %in% unique(site_data$field_site_id))
Pheno_seasons = Pheno_decid %>% #Add season variable
mutate(season = case_when(lubridate::month(datetime) <= 6 ~ "spring",
lubridate::month(datetime) >= 7 ~ "fall"))
Use ggplot to visualize rcc and gcc data during spring green up and fall color change and leaf drop. Can plot across years or choose a single year
Pheno_seasons %>%
filter(lubridate::year(datetime)== 2021) %>% #choosing 2021
filter(season == "spring") %>% #spring
filter(variable == "gcc_90") %>% #only interested in greeness
ggplot(aes(x=datetime,y=observation)) +
geom_point(color="#228b22",size=0.75) +
theme_classic() +
labs(title="Spring phenology", x = "Date", y= "gcc_90") +
facet_wrap(~site_id,ncol=5) +
theme(strip.background=element_blank(),panel.spacing=unit(1,"lines"))
Pheno_seasons %>%
filter(lubridate::year(datetime)== 2021) %>% #2021 YEAR
filter(season == "fall") %>% # Fall
ggplot(aes(x=datetime,y=observation,shape=variable)) + #Plot greeness and redness
geom_point(aes(color=variable),size=0.75) +
scale_color_manual(values = c("#daa520","#228b22")) +
theme_classic() +
facet_wrap(~site_id,ncol=5) +
labs(title="Fall phenology", x = "Date", y= "Reflectance value",color="Reflectance index",shape="Reflectance index") +
theme(strip.background=element_blank(),panel.spacing = unit(1,"lines"))
Code derived from exercise six of the Ecological Forecasting
#label data
levels(as.factor(Pheno_decid$site_id))
## [1] "BART" "BLAN" "CLBJ" "DELA" "GRSM" "HARV" "LENO" "MLBS" "ORNL" "SCBI"
## [11] "SERC" "STEI" "TREE" "UKFS" "UNDE"
GRSM_pheno = NeonPheno %>% filter(site_id=="GRSM")
time = (subset(GRSM_pheno, GRSM_pheno$variable == 'gcc_90'))$datetime
y = (subset(GRSM_pheno, GRSM_pheno$variable == 'gcc_90'))$observation
#define model
RandomWalk = "
model{
#### Data Model
for(t in 1:n){
y[t] ~ dnorm(x[t],tau_obs)
}
#### Process Model
for(t in 2:n){
x[t]~dnorm(x[t-1],tau_add) #Dependent on observation at previous time point
}
#### Initial condition
x[1] ~ dnorm(x_ic,tau_ic)
#### Priors
tau_obs ~ dgamma(a_obs,r_obs)
tau_add ~ dgamma(a_add,r_add)
}
"
data <- list(y=y,n=length(y), ## data
x_ic=log(1000),tau_ic=100, ## initial condition prior
a_obs=1,r_obs=1, ## obs error prior
a_add=1,r_add=1 ## process error prior
)
#set parameters for mcmc
nchain = 3
init <- list()
for(i in 1:nchain){
y.samp = sample(y,length(y),replace=TRUE)
init[[i]] <- list(tau_add=1/var(diff(log(y.samp))), ## initial guess on process precision
tau_obs=5/var(log(y.samp))) ## initial guess on obs precision
}
#actually define model as a variable
j.model <- jags.model (file = textConnection(RandomWalk),
data = data,
inits = init,
n.chains = 3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 2051
## Unobserved stochastic nodes: 2053
## Total graph size: 4111
##
## Initializing model
## burn-in
jags.burn <- coda.samples (model = j.model,
variable.names = c("tau_add","tau_obs"),
n.iter = 1000)
plot(jags.burn)
jags.out <- coda.samples (model = j.model,
variable.names = c("x","tau_add","tau_obs"),
n.iter = 10000)
plot(jags.out[,c(1,2)]) #Plot after burn in
#plotting the results
time.rng = c(1,length(time)) ## adjust to zoom in and out
out.grsm <- as.matrix(jags.out) ## convert from coda to matrix
x.cols.grsm <- grep("^x",colnames(out.grsm)) ## grab all columns that start with the letter x
ci.grsm <- apply(out.grsm[,x.cols.grsm],2,quantile,c(0.025,0.5,0.975)) ## model was fit on log scale
plot(time,ci.grsm[2,],type='n',ylim=c(0.2,0.6),ylab="GCC_90",xlim=time[time.rng],main="GRSM",xlab="Date")
## adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){
axis.Date(1, at=seq(time[time.rng[1]],time[time.rng[2]],by='month'), format = "%Y-%m")
}
ecoforecastR::ciEnvelope(time,ci.grsm[1,],ci.grsm[3,],col=ecoforecastR::col.alpha("lightBlue",0.75))
points(time,y,pch="+",cex=0.5)
pheno.decid.gcc90 = Pheno_decid %>% filter(variable=="gcc_90")
pheno.wide = pheno.decid.gcc90 %>% pivot_wider(names_from="site_id",values_from="observation")
pheno.2021 = pheno.wide %>% filter(datetime>'2020-12-31')
time = pheno.2021$datetime
pheno.2021 = pheno.2021[,-c(1,2)]
RandomWalk_multiplesites = "model{
#loop over all sites
for (s in 1:ns){
#### Data Model
for(i in 1:n){
y[i,s] ~ dnorm(x[i,s],tau_obs)
}
#### Process Model
for(i in 2:n){
x[i,s]~dnorm(x[i-1,s],tau_add)
}
## initial condition
x[1,s] ~ dnorm(x_ic,tau_ic)
}
#### Priors
tau_obs ~ dgamma(a_obs,r_obs)
tau_add ~ dgamma(a_add,r_add)
}"
data <- list(y=as.matrix(pheno.2021),n=nrow(pheno.2021),
ns = ncol(pheno.2021),
x_ic=log(1000),tau_ic=100, ## initial condition prior
a_obs=1,r_obs=1, ## obs error prior
a_add=1,r_add=1 ## process error prior
)
#set parameters for mcmc
nchain = 3
init <- list()
for(i in 1:nchain){
y.samp = sample(y,length(y),replace=TRUE)
init[[i]] <- list(tau_add=1/var(diff(log(y.samp))), ## initial guess on process precision
tau_obs=5/var(log(y.samp))) ## initial guess on obs precision
}
#actually define model as a variable
s.model <- jags.model (file = textConnection(RandomWalk_multiplesites),
data = data,
inits = init,
n.chains = 3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 9895
## Unobserved stochastic nodes: 10117
## Total graph size: 20020
##
## Initializing model
jags.s.burn <- coda.samples (model = s.model,
variable.names = c("tau_add","tau_obs"),
n.iter = 1000)
plot(jags.s.burn)
jags.s.out <- coda.samples (model = s.model,
variable.names = c("x","tau_add","tau_obs"),
n.iter = 10000)
plot(jags.s.out[,c(1,2)])
#### Helper function to parse JAGS variable names that include matrix syntax (e.g. "x[40,13]")
##' @param w mcmc object containing matrix outputs
##' @param pre prefix (variable name) for the matrix variable to be extracted
##' @param numeric boolean, whether to coerce class to numeric
parse.MatrixNames <- function(w, pre = "x", numeric = FALSE) {
w <- sub(pre, "", w)
w <- sub("[", "", w, fixed = TRUE)
w <- sub("]", "", w, fixed = TRUE)
w <- matrix(unlist(strsplit(w, ",")), nrow = length(w), byrow = TRUE)
if (numeric) {
class(w) <- "numeric"
}
colnames(w) <- c("row", "col")
return(as.data.frame(w))
} # parse.MatrixNames
out.s <- as.matrix(jags.s.out)
x.cols.s = which(substr(colnames(out.s),1,1)=="x") ## which columns are the state variable x
ci.s <- apply(out.s[,x.cols.s],2,quantile,c(0.025,0.5,0.975)) #Credible interval
ci.names.s = parse.MatrixNames(colnames(ci.s),numeric=TRUE)
ci.t = t(ci.s) #Transpose credible intervals to parse visually if needed.
#layout(matrix(1:15,3,5)) ## arrange plots in a 5 x 3 matrix
## plot sample of GCC_90 timeseries
for(i in 1:data$ns){
sel = which(ci.names.s$col == i)
plot(time,ci.t[sel,2],type='n',ylim=c(-0.25,1),ylab="gcc_90",main=colnames(data$y)[i],xlab="Date")
ecoforecastR::ciEnvelope(x=time,ylo=ci.t[sel,1],yhi=ci.t[sel,3],col="lightBlue") #Something odd is happening here and switching the color of the credible intervals: light blue, and then white just before 2022 starts
points(time,data$y[,i],pch="+",cex=1.5)
}